*-------------------------------------------------------------------------------
*						Appendix I Tab 2
*-------------------------------------------------------------------------------

** Set Path
global Raw_data    	"G:\project-finished\Descriptive\Data"
global App_data    	"G:\project-finished\Descriptive\Appendix Data"
global Class_data   "G:\project-finished\Descriptive\Classification"  
global Work_lab   	"G:\project-finished\Descriptive\Lab"
global Out_lab    	"G:\project-finished\Descriptive\Out"  

cd "$Work_lab"
                            
capture log close            
log using "$Out_lab\Appendix I Tab 2", replace 
set more off     


**------------------------------------------------------------------------------
* Step1: Generate Data
* 						column (1)-(3) Long Table Census Data
**------------------------------------------------------------------------------
clear
set obs 0
save tabledata1,replace emptyok

*Occupation@2000
import excel "$App_data\职业和行业就业分布.xlsx", sheet("2000行业-职业交叉")  cellrange(A22:R31) firstrow clear
drop if 职业分类==""|职业分类=="总计"

keep 总计 职业分类
replace 职业分类=subinstr(职业分类," ","",.)
rename (职业分类 总计) (title num_longtable)

gen occ_7=_n
compress

gen year=2000
append using tabledata1
save tabledata1,replace

*Occupation@2010
import excel "$App_data\职业和行业就业分布.xlsx", sheet("2010职业小类") firstrow clear

keep if 维度=="大"
keep 分类 人数
destring 人数,gen(num_longtable)

rename 分类 title
gen occ_7=_n

keep title occ_7 num_longtable

compress
gen year=2010
append using tabledata1
save tabledata1,replace

*Occupation@2015
import excel "$App_data\01-37 2016中国劳动统计年鉴.xls", sheet("Sheet2") firstrow clear
drop if 总计==100
gen occ_7=_n
rename 总计 num_longtable

keep occ_7 num_longtable

compress
gen year=2015
append using tabledata1
save tabledata1,replace

**------------------------------------------------------------------------------
* Step1: Generate Data
* 						column (4)-(6) Census Data
**------------------------------------------------------------------------------
clear
set obs 0
save tabledata2,replace emptyok

*----------Occ Structure,2000----------*
foreach j in "2000" "2010" "2015"{

use "$Raw_data\census`j'.dta",clear

if `j'==2000{
	drop if  occ == . | occ==0

	*Adjust Occ Catergory
	tostring occ,replace
	replace occ="0"+occ if strlen(occ)==2
}

if `j'==2010{

	drop if _职业 == .
	*Adjust Occ Catergory
	tostring _职业,gen(occ)
	replace occ="0"+occ if strlen(occ)==2
}


if `j'==2015{
*same with "labor yearbook", so drop aged below 16
	gen age=2015-birth_year
	drop if age<16
	drop if occ==""
	}


gen temp=substr(occ,1,1)
destring temp,replace

if `j'!=2015{
	gen occ_7=1 if temp==0
	replace occ_7=2 if temp==1 | temp==2
	replace occ_7=3 if temp==3
	replace occ_7=4 if temp==4
	replace occ_7=5 if temp==5
	replace occ_7=6 if temp>=6  & temp <=9
	replace occ_7=7 if occ=="999"
	drop temp
}


if `j'==2015{
	gen occ_7=temp if temp!=8
	replace occ_7=7 if temp==8
}

gen num=_n
collapse (count) num,by(occ_7)
gen year=`j'

append using tabledata2
save tabledata2,replace
}

**------------------------------------------------------------------------------
* Step2: Display Data
**------------------------------------------------------------------------------
use tabledata1,clear
merge 1:1 year occ_7 using tabledata2

drop title _m

gen title="国家机关、党群组织、企业、事业单位负责人" if occ_7==1
replace title="专业技术人员" if occ_7==2
replace title="办事人员和有关人员" if occ_7==3
replace title="商业、服务业人员" if occ_7==4
replace title="农、林、牧、渔、水利业生产人员" if occ_7==5
replace title="生产、运输设备操作人员及有关人员" if occ_7==6
replace title="不便分类的其他劳动者" if occ_7==7


rename (num num_longtable) (paper longtable)
foreach var in longtable paper {
bys year:egen pop`var'=total(`var') //no change for 2015 
gen share`var'=`var'/pop`var'*100
drop `var' pop`var'
rename share`var' `var'
format `var' %9.3f
}

reshape wide longtable paper,i(occ_7 title) j(year)

drop occ_7
order title long* paper*

list _all

erase tabledata1.dta
erase tabledata2.dta

log close

